from scrapy.http import HtmlResponse
from scrapy.selector import Selector

body = '''
<html>
<head>
<base href='http://example.com/' />
<title>Example website</title>
</head>
<body>
<div id='images'>
<a href='image1.html'>Name: Image 1 <br/><img src='image1.jpg' /></a>
<a href='image2.html'>Name: Image 2 <br/><img src='image2.jpg' /></a>
<a href='image3.html'>Name: Image 3 <br/><img src='image3.jpg' /></a>
<a href='image4.html'>Name: Image 4 <br/><img src='image4.jpg' /></a>
<a href='image5.html'>Name: Image 5 <br/><img src='image5.jpg' /></a>
</div>
</body>
</html>
'''
response = HtmlResponse(url='http://www.example.com', body=body, encoding='utf8')
html = response.xpath('/html')
print(html)

print(response.xpath('/html/head'))
print(response.xpath('/html/body/div/a'))
print(response.xpath('//a'))
print(response.xpath('/html/body//img'))
sel = response.xpath('//a/text()')

for text in sel:
    # print(text)
    print(text.extract())

src_list = response.xpath('//img/@src')
for src in src_list:
    print(src.extract())

href_list = response.xpath('//@href')
for href in href_list:
    print(href.extract())

all_attr_list = response.xpath("//a[1]/img/@*")
print(all_attr_list)

text = '<a href="#">Click here to go to the <strong>Next Page</strong></a>'
sel = Selector(text=text)
print(sel.xpath('string(//a)').extract())
